library(gridExtra)
library(dplyr)
library(viridis)
library(ggmap)
library(plyr)
library(ggplot2)
library(usmap)
library(leaflet)
library(mapproj)
library(dlookr)
library(stringr)
library(kableExtra)
data("USArrests")
kbl(psych::headTail(USArrests,6,6), caption="First and last 6 variables in the data", booktabs = T) %>% kable_styling(latex_options = "striped",font_size=10)
First and last 6 variables in the data
|
|
Murder
|
Assault
|
UrbanPop
|
Rape
|
|
Alabama
|
13.2
|
236
|
58
|
21.2
|
|
Alaska
|
10
|
263
|
48
|
44.5
|
|
Arizona
|
8.1
|
294
|
80
|
31
|
|
Arkansas
|
8.8
|
190
|
50
|
19.5
|
|
California
|
9
|
276
|
91
|
40.6
|
|
Colorado
|
7.9
|
204
|
78
|
38.7
|
|
…
|
…
|
…
|
…
|
…
|
|
Vermont
|
2.2
|
48
|
32
|
11.2
|
|
Virginia
|
8.5
|
156
|
63
|
20.7
|
|
Washington
|
4
|
145
|
73
|
26.2
|
|
West Virginia
|
5.7
|
81
|
39
|
9.3
|
|
Wisconsin
|
2.6
|
53
|
66
|
10.8
|
|
Wyoming
|
6.8
|
161
|
60
|
15.6
|
str(USArrests)
## 'data.frame': 50 obs. of 4 variables:
## $ Murder : num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
## $ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
## $ UrbanPop: int 58 48 80 50 91 78 77 72 80 60 ...
## $ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
glimpse(USArrests)
## Rows: 50
## Columns: 4
## $ Murder <dbl> 13.2, 10.0, 8.1, 8.8, 9.0, 7.9, 3.3, 5.9, 15.4, 17.4, 5.3, 2…
## $ Assault <int> 236, 263, 294, 190, 276, 204, 110, 238, 335, 211, 46, 120, 2…
## $ UrbanPop <int> 58, 48, 80, 50, 91, 78, 77, 72, 80, 60, 83, 54, 83, 65, 57, …
## $ Rape <dbl> 21.2, 44.5, 31.0, 19.5, 40.6, 38.7, 11.1, 15.8, 31.9, 25.8, …
summary(USArrests)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
library(funModeling)
profiling_num(USArrests)
## variable mean std_dev variation_coef p_01 p_05 p_25 p_50
## 1 Murder 7.788 4.355510 0.5592591 1.437 2.145 4.075 7.25
## 2 Assault 170.760 83.337661 0.4880397 45.490 50.250 109.000 159.00
## 3 UrbanPop 65.540 14.474763 0.2208539 35.430 44.000 54.500 66.00
## 4 Rape 21.232 9.366385 0.4411447 7.545 8.750 15.075 20.10
## p_75 p_95 p_99 skewness kurtosis iqr range_98
## 1 11.250 15.400 16.763 0.3820378 2.135329 7.175 [1.437, 16.763]
## 2 249.000 297.300 336.020 0.2273179 1.930980 140.000 [45.49, 336.02]
## 3 77.750 86.550 90.020 -0.2191719 2.215790 23.250 [35.43, 90.02]
## 4 26.175 39.745 45.265 0.7769613 3.201898 11.100 [7.545, 45.265]
## range_80
## 1 [2.56, 13.32]
## 2 [56.9, 279.6]
## 3 [45, 83.2]
## 4 [10.67, 32.4]
library(naniar)
vis_miss(USArrests)

dlookr::plot_outlier(USArrests)




USArrests %>%
plot_normality(Murder,Assault, UrbanPop,Rape)




library(maps)
crimes <- data.frame(state=tolower(rownames(USArrests)), USArrests)
gg <- ggplot(crimes, aes(map_id=state, fill=Murder))
gg <- gg + geom_map(map=map_data("state"))
gg <- gg + expand_limits(x=map_data("state")$long, y=map_data("state")$lat)
gg + labs(title="Murder rates per 100,000 in 1973", x="longitude", y="latitude") +
scale_fill_distiller(palette = "Reds", limits = c(0,18))

ggplot(data=USArrests, aes(x=rownames(USArrests), y=Murder)) +
geom_bar(stat="identity", position=position_dodge(),color="darkblue",fill="darkorange")+
geom_text(aes(label=Murder), vjust=0.5,hjust=-0.1, color="black",
position = position_dodge(0.9), size=2, fontface="bold")+
theme_grey()+labs(title = "Number of Murder in each city", x="State")+ coord_flip()

library(ggrepel)
ggplot(USArrests, aes(x = Murder, y = Assault)) +
geom_point() +
geom_label_repel(aes(label = rownames(USArrests),color=rownames(USArrests)), fontface = "bold") +
theme(legend.position = "none") +
geom_smooth(method = lm, se = FALSE)+ labs(title="Scatter plot of murder and assault for each state")

library(ggplot2)
library(ggExtra)
p1 <- ggplot(USArrests, aes(x=UrbanPop
, y=Rape)) +
geom_point(size=2,color="darkred") +
theme(legend.position="bottom")+
geom_smooth(method = lm, se = F)+labs(title="Scatter plot of Urban population and rape", subtitle = "Relationship betweenpopulation and rape in 1973")+
theme(
plot.title = element_text(vjust = -1), plot.subtitle = element_text(vjust=-1.5))
ggMarginal(p1, type="boxplot", size=7, fill = "slateblue")

library(corrplot)
library(PerformanceAnalytics)
res <- cor(USArrests, method="spearman")
corrplot::corrplot(res, method= "color", order = "hclust", addCoef.col = "black",
tl.col="black", tl.srt=45)

library(plotly)
fig <- plot_ly(USArrests, x = ~rownames(USArrests))
fig <- fig %>% add_lines(y = ~Rape)
fig <- fig %>% add_lines(y = ~Murder)
fig <- fig %>% add_lines(y = ~Assault)
fig <- fig %>% add_lines(y = ~UrbanPop)
fig <- fig %>% layout(
title = "All of the variables for each city",
xaxis = list(
rangeselector = list(
buttons = list(
list(
count = 3,
label = "3 mo",
step = "month",
stepmode = "backward"),
list(
count = 6,
label = "6 mo",
step = "month",
stepmode = "backward"),
list(
count = 1,
label = "1 yr",
step = "year",
stepmode = "backward"),
list(
count = 1,
label = "YTD",
step = "year",
stepmode = "todate"),
list(step = "all"))),
rangeslider = list(type = "date")),
yaxis = list(title = "value"))
fig1 <- plot_ly(USArrests, x = ~Murder, y = ~Assault, marker = list(color = 'rgba(222,45,38,0.8)',
line = list(color = 'rgb(8,48,107)',
width = 1.5)))
fig1<- fig1 %>% layout(title = "Relationship between assault and murder")
fig1
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
library(plotly)
data(mtcars)
p <- plot_ly() %>%
add_trace(data = mtcars,
x = ~cyl, y = ~mpg,
type = 'scatter',
mode = 'lines') %>%
layout(autosize = T,
title = "Test",
xaxis = list(title = "<a href = 'https://www.nytimes.com/'>The NY TIMES</a>"))
Sys.setenv("plotly_username"="your_plotly_username")
Sys.setenv("plotly_api_key"="your_api_key")